#####################
# Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
# Purpose: Provides adjusted p-values for table 8 from the Appendix (Main Treatment Effects, Pooling Night-Sleep Treatments and Including Multiple Hypothesis Testing Corrections). 
#          Takes both the unadjusted p-values from table_A7.do and the RI p-values from table_A7_RI.do, performs the
#          step-down procedure, and exports the adjusted p-values.
# Last edited: 07 May 2021
#####################

rm(list = ls())

options(scipen = 99)

require(rio)
require(tidyverse)

# Load function that performs step-down adjustment 
source("Scripts/adjust_pval_step_down_fun.R")

# Load actual p-values
df.p = import("Datasets/pvals_unadjusted_table_int_pooled.csv") %>% 
  as_tibble() %>% 
  rename(p_val_nap = p_val_nap21)

colnames(df.p) = str_remove(colnames(df.p), pattern = "1")

# Divide them into the families that we want to correct p-values
df.family = df.p %>% 
  filter( var_name %in%  c("earnings", "wellbeing", "cogindex", "pref") )

df.work = df.p %>% 
  filter( var_name %in%  c("prod", "typing", "output"))

df.wb = df.p %>% 
  filter( var_name %in%  c("physical", "psych"))

df.cog = df.p %>% 
  filter( var_name %in%  c("cogfunction", "attention"))

df.pref = df.p %>% 
  filter( var_name %in%  c("timepref", "social", "risk"))

# Load RI p-vals/t-stats
df.ri.pvals.raw = import("Datasets/mht_ri_pvals_table_int_pool.dta") %>% as_tibble()

# Keep only pvals and t-stats
df.ri.pvals = df.ri.pvals.raw %>% 
  select(contains(c("tstat_", "p_"))) %>% 
  select(!contains(c("break", "work")))

# Transform t-stats into p-vals
t_to_pval = function(t){
  pval = 2*pnorm(-abs(t))
}

df.ri.pvals = df.ri.pvals %>% 
  mutate(across(.cols = contains("tstat_"), .fns = t_to_pval))

# Rename t_stats to p_vals 
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")], pattern = "tstat_", replacement = "p_")

# Rename name of naps from nap2 to nap
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "nap2")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "nap2")], pattern = "nap2", replacement = "nap")

# Divide them into the families that we want to correct p-values
df.family.RI = df.ri.pvals %>% 
  select(contains(c("p_earnings", "p_wellbeing", "p_cogindex", "p_pref")))

df.work.RI = df.ri.pvals %>% 
  select(contains(c("prod", "typing", "output")))

df.wb.RI = df.ri.pvals %>% 
  select(contains(c("physical", "psych")))

df.cog.RI = df.ri.pvals %>% 
  select(contains(c("cogfunction", "attention")))

df.pref.RI = df.ri.pvals %>% 
  select(contains(c("timepref", "social", "risk")))

# Function to get p-vals for different comparisons
get_adjust_pvals = function(df.original, df.RI){
  ### Correct family-wise outcomes ####
  # Night-Sleep vs. Control
  p_val_original = df.original$p_val_ns
  p_val_RI_mat   = df.RI %>% select(contains("_ns")) %>% select(!contains(c("_d_")))  %>% as.matrix
  p_val_mht_ns_cont = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Control
  p_val_original = df.original$p_val_nap
  p_val_RI_mat   = df.RI %>% select(contains("nap")) %>% select(!contains(c("_d_"))) %>% as.matrix
  p_val_mht_nap_cont = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)

  # Night-Sleep + Nap vs. Control
  p_val_original = df.original$p_val_int
  p_val_RI_mat   = df.RI %>% select(contains("int")) %>% select(!contains(c("_d_"))) %>% as.matrix
  p_val_mht_int_cont    = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Night-Sleep vs. Nap
  p_val_original = df.original$p_val_d_nap_ns
  p_val_RI_mat   = df.RI %>% select(contains("_d_nap_ns"))  %>% as.matrix
  p_val_mht_nap_ns = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)

  # Night-Sleep + Nap vs. Night-Sleep
  p_val_original = df.original$p_val_d_int_ns
  p_val_RI_mat   = df.RI %>% select(contains("_d_int_ns"))  %>% as.matrix
  p_val_mht_int_ns = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Night-Sleep + Nap vs. Nap
  p_val_original = df.original$p_val_d_int_nap
  p_val_RI_mat   = df.RI %>% select(contains("_d_int_nap"))  %>% as.matrix
  p_val_mht_int_nap = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
    
  df.res = tibble(
    comparison = rep(c("NS vs. Control","Nap vs. Control", "Interaction vs. Control", "Nap vs. NS","Interaction vs. NS","Interaction vs. Nap"),rep(nrow(df.original),6)),
    outcome = rep(df.original$var_name, 6),
    p_val_original = c(df.original$p_val_ns, df.original$p_val_nap, df.original$p_val_int, df.original$p_val_d_nap_ns, df.original$p_val_d_int_ns, df.original$p_val_d_int_nap),
    p_val_adj = c(p_val_mht_ns_cont, p_val_mht_nap_cont, p_val_mht_int_cont, p_val_mht_nap_ns, p_val_mht_int_ns, p_val_mht_int_nap)
    )
  
  return(df.res)
  
}

# Correction across family-wide outcomes
df.pval.adj.family = get_adjust_pvals(df.family, df.family.RI)
# Correction across work outcomes
df.pval.adj.work = get_adjust_pvals(df.work, df.work.RI)
# Correction across well-being outcomes
df.pval.adj.wb = get_adjust_pvals(df.wb, df.wb.RI)
# Correction across cognition outcomes
df.pval.adj.cog = get_adjust_pvals(df.cog, df.cog.RI)
# Correction across family-wide outcomes
df.pval.adj.pref = get_adjust_pvals(df.pref, df.pref.RI)

df.pval.all = bind_rows(
  df.pval.adj.family,
  df.pval.adj.work,
  df.pval.adj.wb,
  df.pval.adj.cog,
  df.pval.adj.pref
  )

export(df.pval.all, "Datasets/pval_adj_table_int_pooled.csv")





